
###########################################################
##### Haiti elite network project  		          			#####
##### build family dataset	                      		#####
##### 2021 mar 03                   									#####
###########################################################


## 1. merge data on family attributes
## 2. merge data on product attributes
## 3. collapse to business families
## 4. collapse to all elite families


#####
## read in data
#####

## ownership
dat <- read.dta('01_Data/02_Clean/fam_biz_prod.dta')

## family
pol <- read.csv("01_Data/02_Clean/polbios_fam.csv", as.is=T)
cent <- read.csv('01_Data/02_Clean/centrality.csv')
book <- read.csv('01_Data/02_Clean/all_immig.csv')
cent50 <- read.csv('01_Data/02_Clean/centrality_1950.csv')
cent25 <- read.csv('01_Data/02_Clean/centrality_1925.csv')
coup <- read.csv("01_Data/02_Clean/coup_fams.csv", as.is = T)
pol$X=NULL; cent$X=NULL; book$X=NULL; cent50$X=NULL; cent25$X=NULL; coup$X = NULL

## product
prod <- read.dta('01_Data/02_Clean/product_chars.dta')
desc <- read.csv('01_Data/02_Clean/hs_4_desc.csv')
cpi <- read.csv('01_Data/01_Raw/01_Prices/ihsi cpi 01_13 standard.csv', skip = 1)


#####
## merge family attributes
#####

dat2 <- merge(dat, pol, by.x = "fam", by.y = "name_last", all = T)
dat2 <- merge(dat2, cent, by.x = "fam", by.y = "family", all.x = T)
dat2 <- merge(dat2, book, by = "fam", all.x = T)
dat2 <- merge(dat2, coup, by.x = 'fam', by.y = 'last_name', all.x = T)
setnames(dat2, 'coup_bi', 'coup')

## make some vars 0 if missing
dat2$pol <- car::recode(dat2$pol, "NA=0")
dat2$mil <- car::recode(dat2$mil, "NA=0")
dat2$coup_con <- car::recode(dat2$coup_con, "NA=0")
dat2$coup <- car::recode(dat2$coup, "NA=0")
dat2$syrian <- car::recode(dat2$syrian, "NA=0")
dat2$immig <- car::recode(dat2$immig, "NA=0")

## make some vars binary
dat2$pol <- car::recode(dat2$pol, "1:hi=1")
dat2$mil <- car::recode(dat2$mil, "1:hi=1")

## subset NA
dat2 <- subset(dat2, dat2$fam!="NA")


#####
## merge product attributes
#####

## collapse product data by hs_4
vars <- c("bulk_ln", 'divis_ln', 'ref_lib', 'eos', 'cshare', 'time_con', 'time_bin', 'pci_value')
prod <- data.table(prod)
prod2 <- prod[,lapply(.SD, FUN = function(x) mean(x, na.rm = T)), by = "hs_4", 
              .SDcols = c(vars)]
prod2 <- data.frame(prod2)
prod2[,vars] <- apply(prod2[,vars], 2, function(x) {x <- car::recode(x,"NaN=NA"); x})

## merge product characteristics into family data
dat3 <- merge(dat2, prod2, by = 'hs_4', all.x = T)

## merge product descriptions into family data
desc$hs_4 <- sprintf('%04d', desc$hs_four)
dat3 <- merge(dat3, desc[,c('hs_desc', 'hs_4')], by = 'hs_4', all.x = T)

## make indicator for all inputs
dat3$noconsump <- ifelse(is.na(dat3$cshare)==T, 1, 0)
dat3$cshare2 <- ifelse(dat3$noconsump==1, 0, dat3$cshare)

## make family value weights from value of good divided by number of owners
dat3$share <- dat3$value_1 / dat3$nown

## make biz indicator
dat3$biz <- ifelse(dat3$fam %in% dat$fam, 1, 0)

## make cpi indicator
cpilist <- unlist(strsplit(as.character(cpi[,2]), ", "))


#####
## merge in other periods of network variables
#####

setnames(cent50, 'family', 'fam'); setnames(cent25, 'family', 'fam')
cent50 <- subset(cent50, select = c(fam, 
                                    bonw_02_wnind.1950))
cent25 <- subset(cent25, select = c(fam, 
                                    bonw_02_wnind.1925))
dat3 <- merge(dat3, cent50, by = 'fam', all.x = T)
dat3 <- merge(dat3, cent25, by = 'fam', all.x = T)


#####
## calculate some logs, standardized, etc
#####

dat3$value_log <- log(dat3$value_1)
dat3$nind_log <- log(dat3$nind)

vars <- c('value_log', 'ref_lib', 'cshare', 'cshare2', 'pci_value', 'time_con', 'bulk_ln', 'divis_ln',
          'degree_all_uw', 
          'bon_02_uw', 'bon_02_wnind', 'bonw_02_wnind',
          'bonwbin_02_wnind', 
          'bonw0212_02_wnind', 'bonwautoc_02_wnind', 'bonw_02_wnind2', 
          'bonw_02_wnind_own0', 
          paste0('bonw', seq(0,10,1), "_wnind_own0"),
          'bonwb_02_wnind', 'bonwr_02_wnind', 'bonwp_02_wnind', 'bonw_02_uw',
          paste0('bonw', seq(0,10,1), "_wnind"),
          'bonw_02_wnind.1950', 'bonw_02_wnind.1925')
dat3[,paste0(vars,'_st')] <- apply(dat3[,vars], 2, function(x) {x <- standardize(x); x})

write.csv(dat3, "01_Data/02_Clean/famprod.csv")



#####
## collapse to family
#####

## taking the weighted average by product by value
dat3 <- data.frame(dat3)
fam <- subset(dat3, select = c(fam, wgt_kg, wgt_pct, value_1, value_wo_02:value_logautoc, value_log_st, share, 
                               biz, mil, pol, coup, coup_con, immig, syrian, degree_all_uw,
                               degree_coupall_wnind, nind, nind_all, nind_log, reachability, nclust, qual,
                               bulk_ln:pci_value, noconsump, cshare2, ref_lib_st:divis_ln_st,
                               bon_02_uw, bon_02_wnind, bonw_02_wnind, bonw_02_uw, 
                               bonwbin_02_wnind, bonw0212_02_wnind, bonwautoc_02_wnind, bonw_02_wnind2,  
                               bonw_02_wnind_own0, bonw0_wnind_own0:bonw10_wnind_own0,
                               bonwb_02_wnind, bonwr_02_wnind, bonwp_02_wnind,
                               bonw_02_wnind.1950, bonw_02_wnind.1925,
                               bonw0_wnind:bonw10_wnind,
                               bon_02_uw_st, bon_02_wnind_st, bonw_02_wnind_st, bonw_02_uw_st, 
                               bonwbin_02_wnind_st, bonw0212_02_wnind_st, bonwautoc_02_wnind_st, bonw_02_wnind2_st,  
                               bonwb_02_wnind_st, bonwr_02_wnind_st, bonwp_02_wnind_st,
                               bonw_02_wnind.1950_st, bonw_02_wnind.1925_st,
                               bonw0_wnind_st:bonw10_wnind_st,
                               fastgreedy),
              is.na(dat3$share)==F)
fam$share <- as.numeric(fam$share)
fam <- data.table(fam)
fam <- fam[,lapply(.SD, weighted.mean, w = share, na.rm = T), by = "fam"]

temp <- summaryBy(hs_4 + hs_desc + con_final ~ fam,
                  FUN = function(x) paste(unique(x), collapse = "; "),
                  data = dat3)
setnames(temp, colnames(temp), c('fam', 'hs_4', 'hs_desc', 'con_final'))
fam <- merge(fam, temp, by = 'fam', all.x = T)


## identify most valuable products by family

  ## collapse to 2 digit HS code
temp2 <- dat3
temp2$hs_2 <- str_trunc(temp2$hs_4, 2, 'right', ellipsis="")
temp2 <- temp2 %>% 
  group_by(fam, hs_2) %>%
  summarize(value_1 = sum(value_1, na.rm=T)) 
temp2 <- temp2 %>%
  group_by(fam) %>%
  arrange(fam,desc(value_1)) %>%
  mutate(value_total = sum(value_1, na.rm=T))
temp2$value_prop <- temp2$value_1/temp2$value_total
temp2 <- temp2 %>%
  subset(value_prop >= .01)

  ## merge in hs-2 descriptions
hs <- read.csv('01_Data/02_Clean/hs_2digit_desc.csv',fileEncoding="UTF-8-BOM")
hs <- hs %>% 
  mutate(hs_2 = str_pad(HS.Code, width = 2, side = "left", pad = '0'), 
         hs_2_desc = Product.descriptions) %>%
  dplyr::select(starts_with('hs_'))

temp2 <- merge(temp2, hs, by = 'hs_2', all.x = T)


  ## collapse string descriptions
temp3 <- temp2 %>% 
  group_by(fam) %>%
  summarize(top_hs2 = paste(unique(hs_2), collapse = "; "),
            top_hs2_desc = paste(unique(hs_2_desc), collapse = "; "))

  ## merge into family data
fam <- merge(fam, temp3, by = 'fam', all.x = T)

write.csv(fam, "01_Data/02_Clean/fam.csv")
write.dta(fam, "01_Data/02_Clean/fam.dta")


#####
## make data for all fams (not just importers)
#####

all <- unique(subset(dat3, select = c(fam, biz, mil, pol, coup, coup_con, immig, syrian, degree_all_uw,
                                      nind, nind_all, nind_log, reachability, nclust, qual, 
                                      fastgreedy,
                                      degree_coupall_wnind, 
                                      bon_02_uw, bon_02_wnind, bonw_02_wnind, bonw_02_uw, 
                                      bonwbin_02_wnind, 
                                      bonw0212_02_wnind, bonwautoc_02_wnind, bonw_02_wnind2,  
                                      bonw_02_wnind_own0, bonw0_wnind_own0:bonw10_wnind_own0,
                                      bonwb_02_wnind, bonwr_02_wnind, bonwp_02_wnind,
                                      bonw0_wnind:bonw10_wnind,
                                      bonw_02_wnind.1950, bonw_02_wnind.1925,
                                      bon_02_uw_st, bon_02_wnind_st, bonw_02_wnind_st, bonw_02_uw_st, 
                                      bonwbin_02_wnind_st, 
                                      bonw0212_02_wnind_st, bonwautoc_02_wnind_st, bonw_02_wnind2_st,  
                                      bonwb_02_wnind_st, bonwr_02_wnind_st, bonwp_02_wnind_st,
                                      bonw0_wnind_st:bonw10_wnind_st,
                                      bonw_02_wnind.1950_st, bonw_02_wnind.1925_st)))

all <- subset(all, all$biz==1 | all$mil==1 | all$pol==1)

write.csv(all, '01_Data/02_Clean/allfams.csv')
write.dta(all, '01_Data/02_Clean/allfams.dta')


#####
## collapse to family
#####

## taking the weighted average by family by value
prod <- subset(dat3, select = c(hs_4, hs_desc, wgt_kg, wgt_pct, value_1, value_wo_02:value_logautoc, value_log_st, share, 
                               biz, mil, pol, coup, coup_con, immig, syrian, degree_all_uw, 
                               nind, nind_all, nind_log, reachability, nclust, qual,
                               bulk_ln:pci_value, noconsump, cshare2, ref_lib_st:divis_ln_st,
                               degree_coupall_wnind, 
                               bon_02_uw, bon_02_wnind, bonw_02_wnind, bonw_02_uw, 
                               bonwbin_02_wnind, bonw0212_02_wnind, bonwautoc_02_wnind, bonw_02_wnind2,  
                               bonw_02_wnind_own0, bonw0_wnind_own0:bonw10_wnind_own0,
                               bonwb_02_wnind,bonwr_02_wnind, bonwp_02_wnind,
                               bonw_02_wnind.1950, bonw_02_wnind.1925,
                               bonw0_wnind:bonw10_wnind,
                               bon_02_uw_st, bon_02_wnind_st, bonw_02_wnind_st, bonw_02_uw_st, 
                               bonwbin_02_wnind_st, 
                               bonw0212_02_wnind_st, bonwautoc_02_wnind_st, bonw_02_wnind2_st,  
                               bonwb_02_wnind_st, bonwr_02_wnind_st, bonwp_02_wnind_st,
                               bonw_02_wnind.1950_st, bonw_02_wnind.1925_st,
                               bonw0_wnind_st:bonw10_wnind_st),
              dat3$hs_4!="NA" & dat3$hs_4!="00NA")
prod$share <- as.numeric(prod$share)
prod <- data.table(prod)
prod <- prod[,lapply(.SD, weighted.mean, w = share, na.rm = T), by = c('hs_4', 'hs_desc')]

temp <- summaryBy(fam + con_final ~ hs_4,
                  FUN = function(x) paste(unique(x), collapse = "; "),
                  data = dat3)
setnames(temp, colnames(temp), c('hs_4', 'fam', 'con_final'))
prod <- merge(prod, temp, by = 'hs_4', all.x = T)

write.csv(prod, "01_Data/02_Clean/prod.csv")











